###############################
# VERSION 1.3
# =====================
#
#  If you use the functions defined in this programme, please cite it
#  referring to the most recent version the corresponding research note---
#
#     Goplerud, Max. (2016). "Crossing the Boundaries: An Implmentation of Two Methods for Projecting 
#     Data across Boundary Changes." Political Analysis. 24(1): 121-129.
#
#  This programme is distributed without any warranty. If any issues (or suggestions for improvement) #  arise when using the programme, feel free to contact the author (Max Goplerud) using the contact 
#  information provided in the Dataverse.
#
###############################

#Load the required packages#
require(maptools)
require(raster)
require(plyr)
require(gmp)

#A Sieve of Eratosthenes for Generating Primes, from, e.g. rosettacode.org/wiki/Sieve_of_Eratosthenes#R
sieve.of.e <- function(n){
  n <- as.integer(n)
  primes <- rep(TRUE, n)
  primes[1] <- FALSE
  last.prime <- 2L
  fsqr <- floor(sqrt(n))
  while (last.prime <= fsqr)
  {
    primes[seq.int(2L*last.prime, n, last.prime)] <- FALSE
    sel <- which(primes[(last.prime+1):(fsqr+1)])
    if(any(sel)){
      last.prime <- last.prime + min(sel)
    }else last.prime <- fsqr+1
  }
  which(primes)
}

# Function used to Calculate the Interpolated Values Given the Weights from
# interpolation algorithm. This can be used directly to 'feed in' data
# that was not included in the original shapefile. 
# It preserves all non-numeric variables and interpolates all numeric ones 
# fed to it and returns a datafarme with the appropriate 'prefix' on each variable.
weights.calculation <- function(int.weights,old.data,pfx,new.name,old.name,weights.name='transfer.prop', weights.old.name = NULL, weights.new.name = NULL){
  if (!is.null(weights.old.name)){
    names(int.weights)[match(weights.old.name,names(int.weights))] <- old.name
  }
  if (!is.null(weights.new.name)){
    names(int.weights)[match(weights.new.name,names(int.weights))] <- new.name
  }
  pfx.func <- function(x){
    if (is.null(pfx)){
      return(x)
    }
    else{
      return(paste(pfx,x,sep='.'))
    }
  }
  func.mini <- function(x,y,z){
    x$temp.col <- x[weights.name]*y
    #temp <- aggregate(x$temp.col, by=list(x[z]), FUN=sum)
    temp <- aggregate(x$temp.col, by=list(x[[z]]), FUN=sum)
    #temp <- aggregate(x$temp.col, by=c(x[z]), FUN=sum)
    names(temp) <- c(new.name, paste('pop.int',names(y),sep='.'))
    return(temp)
  }
  old.data <- old.data[!(names(old.data) == new.name)]
  int.weights <- merge(int.weights, old.data, by=old.name)
  
  s <- sapply(int.weights, class)
  temp.which <- which((s == 'numeric' | s == 'integer') & names(s) != weights.name)
  df.temp <- lapply(int.weights[temp.which], FUN=function(w){func.mini(int.weights,w, new.name)})
  merge.all <- function(x,y){merge(x,y,by=new.name)}
  #Supress Warnings is to keep there from being warnings from the merge.all duplicating names, 
  #but this is in fact fixed by the sapply on the lines below so the warnings are 'false flags'
  out <- suppressWarnings(Reduce(merge.all,df.temp))
  names(out) <- c(new.name, sapply(names(temp.which), FUN=pfx.func))  
  return(out)
}



#The Core Interpolating Algorithm
interpolation  <- function(old.shp, old.name, new.shp, new.name, pop.shp, pop.name, raster.size, 
                           prime.numbers=10^6, pfx.di='di', pfx.aw='aw', manual.extent=NULL, verbose=1, use.gdal=FALSE,
                           old.data = NULL, temp.dir = NULL, repair.pop=TRUE,
                           repair.threshold = 99){
  long.time <- proc.time()
  #Verification that valid arguments are included#
  if (is.null(pop.shp)){
    if (verbose != 0){message('No Population Data Provided.')}
    pop.shp <- old.shp
    pop.shp$fake.population <- 1
    pop.name <- 'fake.population'
  }
  if (class(old.shp)[1] %in% c('SpatialPolygonsDataFrame','character')){}else{stop('old.shp must be a SpatialPolygonDataFrame or a file path (character).')}
  if (class(new.shp)[1] %in% c('SpatialPolygonsDataFrame','character')){}else{stop('new.shp must be a SpatialPolygonDataFrame or a file path (character).')}
  if (class(pop.shp)[1] %in% c('SpatialPolygonsDataFrame','character')){}else{stop('pop.shp must be a SpatialPolygonDataFrame or a file path (character).')}
  if (class(old.name) == 'character'){}else{stop('old.name must be a character/string')}
  if (class(new.name) == 'character'){}else{stop('old.name must be a character/string')}
  if (class(pop.name) == 'character'){}else{stop('old.name must be a character/string')}
  if (is.null(old.data) | class(old.data) == 'data.frame'){}else{stop('old.data must be a data frame or left blank (old.data=NULL) to use the data in old.shp.')}
  if (verbose %in% 0:2){}else{stop('verbose must equal 0, 1, or 2.')}
  if (!is.numeric(prime.numbers) | prime.numbers < 0){stop('prime.numbers must be numeric and positive.')}
  if (!is.numeric(raster.size) | raster.size < 0){stop('raster.size must be numeric and positive.')}
  if (prime.numbers > 10^8){warning('A very large number (>10^8) of primes requested. Do you really need this many?', 'It may be advisable to split your shapefiles into smaller chunks.')}
  if (class(pfx.di) != 'character' | class(pfx.aw) != 'character'){stop('Prefixes on Interpolated Variables (pfx.di, pfx.aw) must be strings/characters.')}
  if (is.null(manual.extent) | class(manual.extent)[1] == 'extent'){}else{stop('manual.extent must an extent-class object or left blank (manual.extent=NULL.')}
  if (!is.logical(use.gdal)){stop('use.gdal must be logical')}
  if (missing(old.shp)){
    stop('Old Boundaries (old.shp) are Missing.')
  }
  if (missing(new.shp)){
    stop('New Boundaries (new.shp) are Missing.')
  }
  if (missing(pop.shp)){
    stop('Population Sub-Units (pop.shp) are Missing')
  }
  if (!is.null(old.data)){
    if (old.name %in% names(old.data) == FALSE){stop(old.name, 'is not a variable in old.data')}
  }
  if (old.name == new.name){
    stop("Please set different names for 'old.name' and 'new.name'.")
  }
  if (is.na(match(old.name, names(old.shp)))){stop('old.name not found in old.shp')}
  if (is.na(match(new.name, names(new.shp)))){stop('new.name not found in new.shp')}
  if (is.na(match(pop.name, names(pop.shp)))){stop('pop.name not found in pop.shp')}
  #Set Custom Dir for Temporary Files:
  sub.folder <- paste('int_prog',paste(sample(c(letters,0:9),10, replace=T), collapse=''), sep='_')
  if (is.null(temp.dir)){
    temp.dir <- paste(tempdir(), sub.folder,sep='/')
  }else{
    if(!file.exists(temp.dir)){stop('Invalid directory specified for temporary files.')}
    temp.dir <- paste(temp.dir, sub.folder,sep='/')
  }
  dir.create(temp.dir)
  #Load Any Remaining Strings as Shapefiles
  if (class(old.shp) == 'character'){old.shp <- readShapeSpatial(old.shp)}
  if (class(new.shp) == 'character'){new.shp <- readShapeSpatial(new.shp)}
  if (class(pop.shp) == 'character'){pop.shp <- readShapeSpatial(pop.shp)}
  
  if (length(unique(old.shp[[old.name]])) != length(old.shp)){stop('The old.shp unique IDs are not unique. Please fix this.')}
  if (length(unique(new.shp[[new.name]])) != length(new.shp)){stop('The new.shp unique IDs are not unique. Please fix this.')}
  
  all.timings <- list(rasterisation=NULL,extract=NULL,overall=NULL)
  
  #Number of Polygons in Each Shapefile
  n.old <- length(old.shp)
  n.new <- length(new.shp)
  n.pop <- length(pop.shp)
  #Warnings about 'Empty' Shapefiles#
  if(min(n.old,n.new,n.pop) == 0){stop('A shapefile contains no polygons.')}
  #Warnings about the Sub-Unit Polygon having fewer units than the Boundary Polygons#
  if((n.new > n.pop) | (n.old > n.pop)){
    warning('Fewer wards (population-weighted subunits) 
            than constituencies (polygons) in the boundary shapefiles. This may be okay ')
  }
  
  #Add a unique prime to each Old Constituency and Ward (Population Unit)
  n.primes <- sieve.of.e(prime.numbers)
  if(length(n.primes) < (n.old + n.pop)){stop('Too few primes generated. Increase prime.numbers.')}
  
  old.shp$f_PRIME_ID <- n.primes[1:n.old]
  pop.shp$f_PRIME_ID <- n.primes[(n.old+1):(n.pop+n.old)]
  
  # Looks for the existence of NAs in the population data; if any found
  # replace them with '0' and then report this to the user unless verbose
  # is turned to its lowest setting.
  if (sum(is.na(pop.shp[[pop.name]])) > 0){
    if (verbose > 0){message('NAs in the population data. Replaced with 0s for interpolation.')}
    pop.shp[[pop.name]][is.na(pop.shp[[pop.name]])] <- 0
  }
  #If new constituencies are created that have zero population, the DI method will break.
  if (sum(pop.shp[[pop.name]] == 0) > 0){
    if (verbose > 0){message('0s found in the population data. This may cause problems later, especialy if no separate population data is included. Replacing the 0s with a small number, e.g. "1", is preferable.')}
  }
  if (sum(pop.shp[[pop.name]] < 0) > 0){
    stop('Negative values found for the population. Please fix.')
  }
  # Creates a 'maximal' bounding box if not specified via manual.extent
  # This takes the largest of the provided 'maximum' dimensions and the smallest
  # of the 'minimum' ones.
  # This may create some extra 'whitespace' but should ensure that there are no issues with
  # parts of one shp being unrasterised.
  #
  # One should visually verify the shapefiles before interpolating to ensure
  # that any exclusions are comparably unimportant.
  # 
  # In general, bounding boxes should be quite similar.
  if (is.null(manual.extent)){
    bbox.old <- bbox(new.shp)
    bbox.new <- bbox(old.shp)
    bbox.pop <- bbox(pop.shp)
    bbox.adjust <- matrix(rep(NA,4), nrow=2, dimnames=list(c('x','y'),c('min','max')))
    bbox.adjust['x','min'] <- min(bbox.old['x','min'],bbox.new['x','min'],bbox.pop['x','min'])
    bbox.adjust['y','min'] <- min(bbox.old['y','min'],bbox.new['y','min'],bbox.pop['y','min'])
    bbox.adjust['x','max'] <- max(bbox.old['x','max'],bbox.new['x','max'],bbox.pop['x','max'])
    bbox.adjust['y','max'] <- max(bbox.old['y','max'],bbox.new['y','max'],bbox.pop['y','max'])
    extent.adjust <- extent(bbox.adjust)
  }else{
    extent.adjust <- manual.extent
  }
  ptm <- proc.time()
  if (verbose > 0){message('Beginning Rasterisation of Shapefiles')}
  
  if (use.gdal == TRUE){
    if (verbose > 0){message('use.gdal = TRUE; Verifying GDAL Installation')}
    require(rgdal)
    require(gdalUtils)
    gdal_setInstallation()
    valid_install <- !is.null(getOption("gdalUtils_gdalPath"))
    #If Verificiation Fails, Stop Program and Tell to Set use.gdal to false
    if (valid_install && require(rgdal)){}else{stop('GDAL installation not detected. 
                                                    Have you loaded/installed rgdal and gdalUtils. 
                                                    Set use.gdal to FALSE to continue.')}
    
    #Create Temporary Shapefiles ///with the Prime IDS// that can be called by GDAL
    str.temp.old <- paste(tempfile(tmpdir=temp.dir, pattern='tempshp'),'.shp',sep='')
    str.temp.pop <- paste(tempfile(tmpdir=temp.dir, pattern='tempshp'),'.shp',sep='')
    writeSpatialShape(old.shp[c('f_PRIME_ID')], str.temp.old)
    writeSpatialShape(pop.shp[c('f_PRIME_ID')], str.temp.pop)
    temp.shp.old <- readShapeSpatial(str.temp.old, proj4string=CRS(proj4string(old.shp)))
    temp.shp.pop <- readShapeSpatial(str.temp.pop, proj4string=CRS(proj4string(pop.shp)))
    
    temp.raster.old <- paste(tempfile(tmpdir = temp.dir, pattern='tempraster'), '.tif', sep='')
    temp.raster.pop <- paste(tempfile(tmpdir = temp.dir, pattern='tempraster'), '.tif', sep='')
    #This formulation ensures the programme runs with GDAL version below 1.8.0
    #Create a blank raster of all zeros; right extent and dimensions
    blank.raster <- raster(extent.adjust, nrow=raster.size, ncol=raster.size, vals=0)
    
    ###values(blank.raster) <- 0
    
    #Write two versions of this blank raster
    writeRaster(blank.raster, filename=temp.raster.old, overwrite=TRUE)
    writeRaster(blank.raster, filename=temp.raster.pop, overwrite=TRUE)
    #Rasterize using GDAL
    layer.old <- gsub(str.temp.old, pattern=temp.dir, replacement = '', fixed=T)
    layer.old <- gsub(layer.old, pattern='^(/|\\\\)+|\\.shp$', replacement = '')
    
    layer.pop <- gsub(str.temp.pop, pattern=temp.dir, replacement = '', fixed = T)
    layer.pop <- gsub(layer.pop, pattern='^(/|\\\\)+|\\.shp$', replacement = '')
    if (verbose == 2){message('Old Constituency Boundaries')}  
    raster.old <- gdal_rasterize(str.temp.old, dst_filename=temp.raster.old, output_Raster=TRUE, l=layer.old, a = 'f_PRIME_ID')
    if (verbose == 2){message('Ward (Sub-Constituency) Boundaries')}  
    raster.pop <- gdal_rasterize(str.temp.pop, dst_filename=temp.raster.pop, output_Raster=TRUE, l=layer.pop, a = 'f_PRIME_ID')  
    #Convert to Rasters
    rm(blank.raster)
    raster.old <- raster(raster.old, layer = 1)
    raster.pop <- raster(raster.pop, layer = 1)
    # This performs a cell-by-cell multiplication on the rasters to get the 
    # uniquely identified raster of old boundaries cross the wards.
    raster.multi <- overlay(raster.old, raster.pop, fun=function(x,y){x*y})
    # This removes the temporary files created on the disk by GDAL rasterize
    unlink(temp.dir, recursive=TRUE)
    rm(temp.shp.old, temp.shp.pop, raster.old, raster.pop)
    }else{
      #Generate Empty Rasters
      raster.old <- raster(extent.adjust, ncol=raster.size, nrow=raster.size)
      raster.pop <- raster(extent.adjust, ncol=raster.size, nrow=raster.size)
      if (verbose == 2){message('Old Boundaries')}
      #Rasterize the Shapefiles Based on the f_PRIME_ID variable added
      raster.old <- rasterize(old.shp, raster.old, 'f_PRIME_ID')
      if (verbose == 2){message('Ward (Sub-Constituency) Boundaries')}
      raster.pop <- rasterize(pop.shp, raster.pop, 'f_PRIME_ID')
      # This performs a cell-by-cell multiplication on the rasters to get the 
      # uniquely identified raster of old boundaries cross the wards.    
      raster.multi <- overlay(raster.old, raster.pop, fun=function(x,y){x*y})
      unlink(temp.dir, recursive=TRUE)
      rm(raster.old, raster.pop)
    }
  if (verbose > 0){message('Rasterisation Completed')}  
  ptm <- (proc.time() - ptm)/60
  ptm <- as.numeric(ptm[3])
  all.timings$rasterisation <- ptm
  #Run the Extract Command, i.e. find the distribution of pixels into the new boundaries  
  ptm <- proc.time()
  if (verbose > 0){message('Starting Extract Command')}
  list.zonal <- extract(raster.multi, new.shp, df=FALSE, weights=FALSE, method='simple')
  ptm <- (proc.time() - ptm)/60
  ptm <- as.numeric(ptm[3])
  if (verbose >0){message('Extract Completed: Time Elapsed - ',round(ptm, digits=1), ' minutes')}
  all.timings$extract <- ptm
  #Create functions to factorize the relevant composite numbers
  fun.distribute <- function(x){
    #temp <- as.data.frame(table(x), stringsAsFactors=FALSE)
    temp <- as.data.frame(table(x[x != 0]), stringsAsFactors=FALSE)
    names(temp) <- c('ID','Freq')
    temp <- transform(temp, ID = as.numeric(ID))
    return(temp)
  }
  fun.factorize <- function(x){
    t.len <- length(factorize(x))
    t.min <- as.numeric(min(factorize(x)))
    t.max <- as.numeric(max(factorize(x)))
    #rtn <- list(fct.len = t.len, fct.min = t.min, fct.max = t.max)
    rtn <- c(t.len, t.min, t.max)
    names(rtn) <- c('len','min','max')
    return(rtn)
  }
  
  dist.multi <- lapply(list.zonal, FUN=fun.distribute)
  dist.multi <- lapply(dist.multi, FUN=function(x){
    o <- cbind(x, t(sapply(x$ID, FUN=fun.factorize)), row.names=NULL)
    if (dim(o)[1] == 0){stop('A new constituency contained zero pixels. Increase raster size.')}else{return(o)}
  })
  
  if (unique(unlist(lapply(dist.multi, function(x){unique(x$len)}))) == 2){}else{stop('An error occured with the prime factorisation. Some cells did not have exactly two prime factors.')}
  all.multi <- fun.distribute(unlist(list.zonal))
  all.multi <- cbind(all.multi, t(sapply(all.multi$ID, FUN=fun.factorize)), row.names=NULL)
  
  fm <- all.multi$min[is.finite(all.multi$min)]
  if (length(unique(fm)) != n.old){warning('Failed to rasterise all old constituencies. Increasing raster.size is strongly encouraged.')}
  rm(fm)
  
  rasterised.pop.ids <- unique(all.multi$max)
  all.pop.ids <- unique(pop.shp$f_PRIME_ID)
  missing.pop.ids <- setdiff(all.pop.ids, rasterised.pop.ids)
  pop.percent.rasterised <- length(rasterised.pop.ids)/length(all.pop.ids)*100
  if (verbose >0){message('% of Population Shapefiles Rasteristed : ', round(pop.percent.rasterised, digits=2),'%, i.e. missing ',length(missing.pop.ids),' files')}
  
  #Pair each missing population polygon to the closest raster cell
  if (repair.pop == TRUE & pop.percent.rasterised < repair.threshold & pop.percent.rasterised != 100){
    if (verbose > 0){message('Beginning Repairs to Population')}
    repair.time <- proc.time()
    raster.vals <- values(raster.multi)
    missing.pop <- subset(pop.shp, f_PRIME_ID %in% missing.pop.ids)
    missing.matches <- lapply(missing.pop@polygons, function(x){
      #Follows the code from Raster Extract (Small) to Get the Relevant Points
      ishole <- sapply(x@Polygons, function(z){z@hole})
      xy <- lapply(x@Polygons, function(z){z@coords})
      xy <- xy[!ishole]
      k <- unique(unlist(lapply(xy, function(z){raster.multi[cellFromXY(raster.multi, z)]})))
      
      #Lookup which RASTER CELLS correspond to the missing polygons points
      #xy <- x@labpt
      #raster.vals[cellFromXY(raster.multi,xy)]
      return(k)
    })
    names(missing.matches) <- missing.pop$f_PRIME_ID
    missing.matches <- lapply(missing.matches, FUN=function(x){
      x <- x[!is.na(x) & x != 0]
      x <- sapply(x, function(y){max(as.integer(factorize(y)))})
    })
    missing.matches <- ldply(missing.matches, .fun=function(x){if (length(x) != 0){data.frame(x, length(x))}})
    names(missing.matches) <- c('FAILED_ID','MATCH_ID','COUNT_OCCS')
    repair.time <- (proc.time()-repair.time)/60
    all.timings$repair <- as.numeric(repair.time[3])
    rm(missing.pop)
    if (verbose > 0){message('Completed Repairs')}
  }else{missing.matches <- NULL}
  
  agg.pop <- aggregate(all.multi$Freq, by=list(all.multi$max), FUN=sum)
  names(agg.pop) <- c('POP_ID','Ward_Total')
  agg.multi <- aggregate(all.multi$Freq, by=list(all.multi$min, all.multi$max), FUN=sum)
  names(agg.multi) <- c('OLD_ID','POP_ID','Multi_Total')
  
  df.pop <- pop.shp@data
  #Removes All Non-Essential Data From The Population Shapefile
  df.pop <- df.pop[c('f_PRIME_ID',pop.name)]
  names(df.pop) <- c('f_PRIME_ID','pop_data')
  
  df.density <- merge(x=df.pop,y=agg.pop, by.x=c("f_PRIME_ID"), by.y=c("POP_ID"))
  
  if (!is.null(missing.matches)){
    df.missing <- merge(x=missing.matches, y=df.pop, by.x=c('FAILED_ID'), by.y=c('f_PRIME_ID'))
    df.missing$missing_contribution <- df.missing$pop_data/df.missing$COUNT_OCCS
    df.missing <- df.missing[c('MATCH_ID','FAILED_ID','missing_contribution')]
    df.missing <- aggregate(df.missing[['missing_contribution']], by=list(df.missing[['MATCH_ID']]), FUN=sum)
    names(df.missing) <- c('MATCH_ID', 'missing_contribution')
    df.density <- merge(x=df.density, all.x=TRUE, y=df.missing, by.x=c('f_PRIME_ID'), by.y=c('MATCH_ID'))
    df.density['missing_contribution'][is.na(df.density['missing_contribution'])] <- 0
    .ip <- sum(df.density$pop_data)/sum(df.pop$pop_data)*100
    df.density$pop_data <- df.density$pop_data + df.density$missing_contribution
    pop.percent.contribution <- list('initial.percent' = .ip, 'final.percent' = sum(df.density$pop_data)/sum(df.pop$pop_data)*100)  
  }else{
    pop.percent.contribution <- list('final.percent' = sum(df.density$pop_data)/sum(df.pop$pop_data)*100)
  }
  
  
  df.density['density'] <- df.density$pop_data/df.density$Ward_Total
  
  df.all <- merge(x=agg.multi, y=df.density, by.x=c("POP_ID"), by.y = c("f_PRIME_ID"))
  df.all$product <- df.all$density*df.all$Multi_Total
  
  weights.aw <- aggregate(df.all$Multi_Total, by=list(df.all$OLD_ID), FUN=sum)
  names(weights.aw) <- c('OLD_ID','weight.aw')
  weights.aw['weight.aw'] <- 1/weights.aw['weight.aw']
  
  weights.di <- aggregate(df.all$product, by=list(df.all$OLD_ID), FUN=sum)
  names(weights.di) <- c('OLD_ID','weight.di')
  weights.di['weight.di'] <- 1/weights.di['weight.di']
  if (sum(!is.finite(weights.di$weight.di)) != 0){
    stop('Some undefined DI weights produced. (At least one new constituency supposedly has zero population transfer). Thus, this method cannot be used here. Try adding "1" to the population variable.')}
  
  
  df.all <- merge(df.all, weights.aw, by=c('OLD_ID'))
  df.all <- merge(df.all, weights.di, by=c('OLD_ID'))
  
  df.all$weight.di <- df.all$weight.di*df.all$density
  
  
  #Verification#
  if((n.old - sum(df.all$weight.di*df.all$Multi_Total)) != 0){warning('Initial Verif Dasymetric Interpolation Failed')}
  if((n.old - sum(df.all$weight.aw*df.all$Multi_Total))!= 0){warning('Iniital Verif Areal Weighting Failed')}
  
  func.contributions <- function(x){
    #Merge in the Weights
    temp <- merge(x, df.all, by.x=c('min','max'), by.y=c('OLD_ID','POP_ID'))
    temp$contrib.aw <- temp$Freq*temp$weight.aw
    temp$contrib.di <- temp$Freq*temp$weight.di
    new.df <- temp[c('min','max','ID','Freq','contrib.aw','contrib.di')]
    names(new.df) <- c('OLD_ID','POP_ID','COMB_ID','size','contrib.aw','contrib.di')
    return(new.df)
  }
  
  interpol.weights <- lapply(dist.multi, FUN=func.contributions)
  #Introduce the (NEW) Names to the Interpolation Weights
  names(interpol.weights) <- new.shp[[new.name]]
  df.interpol <- ldply(interpol.weights)
  
  final.di <- aggregate(df.interpol$contrib.di, by=list(df.interpol$.id,df.interpol$OLD_ID), FUN=sum)
  final.aw <- aggregate(df.interpol$contrib.aw, by=list(df.interpol$.id,df.interpol$OLD_ID), FUN=sum)
  
  #final.di <- aggregate(contrib.di ~ .id + OLD_ID, data=df.interpol, FUN=sum)
  #final.aw <- aggregate(contrib.aw ~ .id + OLD_ID, data=df.interpol, FUN=sum)
  #final.di <- merge(final.di, old.shp@data[c(old.name,'f_PRIME_ID')], by.x=c('OLD_ID'), by.y=c('f_PRIME_ID'))
  #final.di <- final.di[c('.id',old.name,'contrib.di)]
  #names(final.di) <- c(new.name, old.name, 'contrib.di')
  
  names(final.di) <- c(new.name,'OLD_ID','transfer.prop')
  final.di <- merge(final.di, old.shp@data[c(old.name,'f_PRIME_ID')], by.x=c('OLD_ID'), by.y=c('f_PRIME_ID'))
  final.di <- final.di[c(new.name,old.name,'transfer.prop')]
  
  names(final.aw) <- c(new.name,'OLD_ID','transfer.prop')
  final.aw <- merge(final.aw, old.shp@data[c(old.name,'f_PRIME_ID')], by.x=c('OLD_ID'), by.y=c('f_PRIME_ID'))
  final.aw <- final.aw[c(new.name,old.name,'transfer.prop')]
  
  missing.seats <- c(0,0)
  verif.di <- aggregate(final.di$transfer.prop, by=list(final.di[[old.name]]), FUN=sum)
  names(verif.di) <- c(old.name, 'transfer.prop')
  if (unique(round(verif.di$transfer.prop, 10)) != 1){
    warning('Final Verif. Dasymetric Interpolation Failed')
  }
  if (sum(verif.di$transfer.prop) != n.old){
    warning('Missing Old Seats: DI - ',n.old-sum(verif.di$transfer.prop))    
    missing.seats[1] <- n.old-sum(verif.di$transfer.prop)
  }
  verif.aw <- aggregate(final.aw$transfer.prop, by=list(final.aw[[old.name]]), FUN=sum)
  names(verif.aw) <- c(old.name, 'transfer.prop')
  if (unique(round(verif.aw$transfer.prop, 10)) != 1){
    warning('Final Verif. Areal Weighting Failed')
  }
  if (sum(verif.aw$transfer.prop) != n.old){
    warning('Missing Old Seats: AW - ',n.old-sum(verif.aw$transfer.prop))    
    missing.seats[2] <- n.old-sum(verif.aw$transfer.prop)
  }
  
  if (is.null(old.data)){
    old.data <- old.shp@data
    old.data <- old.data[names(old.data) != 'f_PRIME_ID']
  }
  
  
  if (is.null(old.data)){
    old.data <- old.shp@data
    old.data <- old.data[names(old.data) != 'f_PRIME_ID']
  }
  
  int.di <- weights.calculation(final.di, old.data=old.data, pfx=pfx.di, new.name=new.name, old.name = old.name, weights.name='transfer.prop')
  int.aw <- weights.calculation(final.aw, old.data=old.data, pfx=pfx.aw, new.name=new.name, old.name = old.name, weights.name='transfer.prop')
  long.time <- (proc.time() - long.time)/60
  all.timings$overall <- as.numeric(long.time[3])
  output.final <- list('weights.di' = final.di, 'weights.aw' = final.aw, 'int.di' = int.di, 'int.aw' = int.aw, 'timings' = all.timings, 'missing.seats' = missing.seats, 'percent.pop.rasterized' = pop.percent.rasterised, 'percent.pop.verification' = pop.percent.contribution)
  return(output.final)
  }

